cluster_name: {{cluster_name_on_cloud}}

# The maximum number of workers nodes to launch in addition to the head node.
max_workers: {{num_nodes - 1}}
upscaling_speed: {{num_nodes - 1}}
idle_timeout_minutes: 60

{%- if docker_image is not none %}
docker:
  image: {{docker_image}}
  container_name: {{docker_container_name}}
  run_options:
    - --ulimit nofile=1048576:1048576
    {%- for run_option in docker_run_options %}
    - {{run_option}}
    {%- endfor %}
  {%- if docker_login_config is not none %}
  docker_login_config:
    username: |-
      {{docker_login_config.username}}
    password: |-
      {{docker_login_config.password | indent(6) }}
    server: |-
      {{docker_login_config.server}}
  {%- endif %}
{%- endif %}

provider:
  type: external
  module: sky.provision.lambda
  region: {{region}}
  # Disable launch config check for worker nodes as it can cause resource
  # leakage.
  # Reference: https://github.com/ray-project/ray/blob/cd1ba65e239360c8a7b130f991ed414eccc063ce/python/ray/autoscaler/_private/autoscaler.py#L1115
  # The upper-level SkyPilot code has make sure there will not be resource
  # leakage.
  disable_launch_config_check: true

auth:
  ssh_user: ubuntu
  ssh_private_key: {{ssh_private_key}}

available_node_types:
  ray_head_default:
    resources: {}
    node_config:
      InstanceType: {{instance_type}}

head_node_type: ray_head_default

# Format: `REMOTE_PATH : LOCAL_PATH`
file_mounts: {
  "{{sky_ray_yaml_remote_path}}": "{{sky_ray_yaml_local_path}}",
  "{{sky_remote_path}}/{{sky_wheel_hash}}": "{{sky_local_path}}",
{%- for remote_path, local_path in credentials.items() %}
  "{{remote_path}}": "{{local_path}}",
  "~/.ssh/sky-cluster-key": "{{ssh_private_key}}",
{%- endfor %}
}

rsync_exclude: []

initialization_commands: []

# List of shell commands to run to set up nodes.
# NOTE: these are very performance-sensitive. Each new item opens/closes an SSH
# connection, which is expensive. Try your best to co-locate commands into fewer
# items!
#
# Increment the following for catching performance bugs easier:
#   current num items (num SSH connections): 1
setup_commands:
  # Disable `unattended-upgrades` to prevent apt-get from hanging. It should be called at the beginning before the process started to avoid being blocked. (This is a temporary fix.)
  # Add ~/.ssh/sky-cluster-key to SSH config to allow nodes within a cluster to connect to each other
  # Line 'rm ..': there is another installation of pip.
  # Line 'sudo bash ..': set the ulimit as suggested by ray docs for performance. https://docs.ray.io/en/latest/cluster/vms/user-guides/large-cluster-best-practices.html#system-configuration
  # Line 'sudo grep ..': set the number of threads per process to unlimited to avoid ray job submit stucking issue when the number of running ray jobs increase.
  # Line 'mkdir -p ..': disable host key check
  # Line 'python3 -c ..': patch the buggy ray files and enable `-o allow_other` option for `goofys`
  - {%- for initial_setup_command in initial_setup_commands %}
    {{ initial_setup_command }}
    {%- endfor %}
    sudo systemctl stop unattended-upgrades || true;
    sudo systemctl disable unattended-upgrades || true;
    sudo sed -i 's/Unattended-Upgrade "1"/Unattended-Upgrade "0"/g' /etc/apt/apt.conf.d/20auto-upgrades || true;
    sudo kill -9 `sudo lsof /var/lib/dpkg/lock-frontend | awk '{print $2}' | tail -n 1` || true;
    sudo pkill -9 apt-get;
    sudo pkill -9 dpkg;
    sudo dpkg --configure -a;
    mkdir -p ~/.ssh; touch ~/.ssh/config;
    rm ~/.local/bin/pip ~/.local/bin/pip3 ~/.local/bin/pip3.8 ~/.local/bin/pip3.10;
    {{ conda_installation_commands }}
    {{ ray_skypilot_installation_commands }}
    {{ copy_skypilot_templates_commands }}
    touch ~/.sudo_as_admin_successful;
    sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf';
    sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload;
    mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n  StrictHostKeyChecking no\n  IdentityFile ~/.ssh/sky-cluster-key\n  IdentityFile ~/.ssh/id_rsa" ~/.ssh/config) || printf "Host *\n  StrictHostKeyChecking no\n  IdentityFile ~/.ssh/sky-cluster-key\n  IdentityFile ~/.ssh/id_rsa\n" >> ~/.ssh/config;
    [ -f /etc/fuse.conf ] && sudo sed -i 's/#user_allow_other/user_allow_other/g' /etc/fuse.conf || (sudo sh -c 'echo "user_allow_other" > /etc/fuse.conf');
    {{ ssh_max_sessions_config }}

# Command to start ray clusters are now placed in `sky.provision.instance_setup`.
# We do not need to list it here anymore.
